Generate data with imperfections

Fit normal HMM with 2 states

Try normal model with 3 states

model <- stan_model("hmm.stan")

data_stan <- list(
  N=nrow(df),
  dist=df$obs,
  K=3,
  cost=10,
  p_anomalous=0.01,
  conc_anomalous=100
)

fit <- optimise_repeat(5, model, data_stan)

df$state_est <- fit$par$state
df$state_est[1] <- 1

g <- df %>%
  mutate(state_est=as.factor(state_est)) %>% 
  ggplot(aes(x=time, y=obs)) +
  geom_line() +
  geom_point(aes(colour=state_est)) +
  xlab("Time") +
  ylab(latex2exp::TeX("$\\Y_t")) +
  scale_color_brewer("State", palette = "Dark2")

g


# ggsave("../figures/simulated_corrupt_normal_state_3.pdf", g, width = 6,
#        height = 3)

Fit a model with rubbish collection state.

data_stan <- list(
  N=nrow(df),
  dist=df$obs,
  K=2,
  cost=10,
  p_anomalous=0.01,
  conc_anomalous=100
)
model <- stan_model("hmm_bernoulli.stan")
fit <- optimise_repeat(5, model, data_stan)

df$state_est <- fit$par$state
df$state_est[1] <- 1

g <- df %>%
  mutate(state_est=as.factor(state_est)) %>% 
  ggplot(aes(x=time, y=obs)) +
  geom_line() +
  geom_point(aes(colour=state_est)) +
  xlab("Time") +
  ylab(latex2exp::TeX("$\\Y_t")) +
  scale_color_brewer("State", palette = "Dark2")

g


# ggsave("../figures/simulated_corrupt_normal_state_3.pdf", g, width = 6,
#        height = 3)
fit$par$phi
[1] 0.01184509
fit$par$mu
[1] 0.9560906 8.0321222
fit$par$sigma
[1] 1.198069 1.321352

2D HMM

Fit 2D robust HMM

data_stan <- list(
  N=nrow(df),
  dist=x %>% as.matrix(),
  K=2,
  cost=7,
  p_anomalous=0.1,
  conc_anomalous=100
)
model <- stan_model("hmm_bernoulli_2d.stan")
fit <- optimise_repeat(10, model, data_stan)

g <- x %>% 
  mutate(state=fit$par$state) %>% 
  ggplot(aes(x=V1, y=V2)) +
  geom_point(aes(colour=as.factor(state))) +
  xlab(latex2exp::TeX("$\\Y_{1t}")) +
  ylab(latex2exp::TeX("$\\Y_{2t}")) +
  scale_color_brewer("State", palette = "Dark2")
g


# ggsave("../figures/simulated_2d_corrupt_rubbish.pdf", g, width = 6,
#        height = 3)
fit_r <- fit
fit$par$mu
            [,1]        [,2]
[1,]  7.88259312  7.96074249
[2,] -0.04429909 -0.08280655
fit$par$sigma
[1] 1.015293 1.038413

Try standard HMM with 2 states

fit_2 <- fit
fit$par$mu
           [,1]       [,2]
[1,]  7.5813273  7.6541182
[2,] -0.2256035 -0.2759647
fit$par$sigma
[1] 1.541746 1.581182

Try standard HMM with 3 states

fit_3 <- fit
fit$par$mu
           [,1]      [,2]
[1,] -0.3135811 -0.372200
[2,]  3.5225315  3.660930
[3,]  7.8946867  7.964446
fit$par$sigma
[1] 1.295070 1.335942

Plot state cleanliness

mu_r <- fit_r$par$mu
mu_r <- mu_r[c(2, 1), ]
mu_2 <- fit_2$par$mu
mu_2 <- mu_2[c(2, 1), ]
mu_3 <- fit_3$par$mu[c(1, 3), ]
mu_true <- matrix(c(0, 0, 8, 8), ncol=2, byrow = TRUE)

tmp <- tribble(
  ~y1, ~y2, ~state, ~label,
  mu_true[1, 1], mu_true[1, 2], 1, "truth",
  mu_true[2, 1], mu_true[2, 2], 2, "truth",
  mu_r[1, 1], mu_r[1, 2], 1, "rubbish",
  mu_r[2, 1], mu_r[2, 2], 2, "rubbish",
  mu_2[1, 1], mu_2[1, 2], 1, "2-state normal",
  mu_2[2, 1], mu_2[2, 2], 2, "2-state normal",
  mu_3[1, 1], mu_3[1, 2], 1, "3-state normal",
  mu_3[2, 1], mu_3[2, 2], 2, "3-state normal"
) %>% 
  mutate(state=as.factor(state))

g <- tmp %>% 
  ggplot(aes(x=y1, y=y2)) +
  geom_point(data=tmp %>% filter(label=="truth"), colour="red") +
  geom_point(aes(shape=label), size=2) +
  xlab(latex2exp::TeX("$\\Y_{1t}")) +
  ylab(latex2exp::TeX("$\\Y_{2t}")) +
  scale_color_brewer("State", palette = "Dark2") +
  facet_wrap(~state, scales="free")
g

ggsave("../figures/state_cleanliness_mean.pdf", g)
Saving 5.64 x 3.48 in image

Output covariances for visualisation in Mathematica

Apply rubbish collection to real data

x <- read.csv("../data/hmm_test.csv", header = FALSE)

x <- x %>% 
  slice(1:nrow(x))
df_1 <- df %>% 
  filter(id == 10) %>% 
  mutate(dist = dist /max(dist)) %>% 
  mutate(time=seq_along(x)) %>% 
  slice(1:4000)

df_1 %>% 
  select(time, dist, angle) %>% 
  pivot_longer(-time) %>% 
  ggplot(aes(x=time, y=value)) +
  geom_line() +
  facet_wrap(~name, scales="free")

Fit normal model

data_stan <- list(
  N=nrow(df_1),
  dist=df_1$dist,
  angle=df_1$angle,
  K=2,
  cost=10,
  p_anomalous=0.01,
  conc_anomalous=100
)
model <- stan_model("hmm_wrapped_cauchy.stan")
DIAGNOSTIC(S) FROM PARSER:
Info:
Left-hand side of sampling statement (~) may contain a non-linear transform of a parameter or local variable.
If it does, you need to include a target += statement with the log absolute determinant of the Jacobian of the transform.
Left-hand-side of sampling statement:
    B_step[1] ~ normal(...)
Info:
Left-hand side of sampling statement (~) may contain a non-linear transform of a parameter or local variable.
If it does, you need to include a target += statement with the log absolute determinant of the Jacobian of the transform.
Left-hand-side of sampling statement:
    B_step[2] ~ normal(...)
fit <- optimise_repeat(5, model, data_stan)

df_1 %>% 
  select(time, dist, angle) %>% 
  mutate(state=as.factor(fit$par$state)) %>% 
  pivot_longer(-c(time, state)) %>% 
  ggplot(aes(x=time, y=value, colour=state)) +
  geom_line(aes(group=1)) +
  facet_wrap(~name, scales="free")

Using rubbish collector

data_stan <- list(
  N=nrow(df_1),
  dist=df_1$dist,
  angle=df_1$angle,
  K=2,
  cost=0.5,
  p_anomalous=0.1,
  conc_anomalous=10
)
model <- stan_model("hmm_bernoulli_wrapped_cauchy.stan")
DIAGNOSTIC(S) FROM PARSER:
Info:
Left-hand side of sampling statement (~) may contain a non-linear transform of a parameter or local variable.
If it does, you need to include a target += statement with the log absolute determinant of the Jacobian of the transform.
Left-hand-side of sampling statement:
    B_step[1] ~ normal(...)
Info:
Left-hand side of sampling statement (~) may contain a non-linear transform of a parameter or local variable.
If it does, you need to include a target += statement with the log absolute determinant of the Jacobian of the transform.
Left-hand-side of sampling statement:
    B_step[2] ~ normal(...)
fit <- optimise_repeat(2, model, data_stan)

df_1 %>% 
  select(time, dist, angle) %>% 
  mutate(state=as.factor(fit$par$state)) %>% 
  pivot_longer(-c(time, state)) %>% 
  ggplot(aes(x=time, y=value, colour=state)) +
  geom_line(aes(group=1)) +
  scale_color_brewer("State", palette = "Dark2") +
  facet_wrap(~name, scales="free")

g <- df_1 %>% 
  select(time, dist, angle) %>% 
  mutate(state=as.factor(fit$par$state)) %>% 
  pivot_longer(-c(time, state)) %>% 
  ggplot(aes(x=value, fill=state)) +
  stat_density(position="identity", alpha=0.5) +
  scale_fill_brewer("State", palette = "Dark2") +
  facet_wrap(~name, scales="free")
g
ggsave("../figures/angle_dist_separate_rubbish.pdf", g, width = 10, height = 4)

df_1 %>% 
  select(time, dist, angle) %>% 
  mutate(state=as.factor(fit$par$state)) %>% 
  ggplot(aes(x=dist, y=angle)) +
  geom_density_2d(aes(color=state))

---
title: "HMM Bernoulli"
output: html_notebook
---

```{r}
source("helper.R")
```

Generate data with imperfections
```{r}
mus <- c(1, 8)
sigmas <- c(1, 1)
dfs <- c(1.2, 1.2)
K <- 2
m_transition_probs <- matrix(c(0.9, 0.1, 0.1, 0.9), ncol = 2)

n_steps <- 1000
df <- simulate_hmm(n_steps, 1, m_transition_probs, 0.2, mus, sigmas, dfs)

g <- df %>% 
  ggplot(aes(x=time, y=obs)) +
  geom_line() +
  geom_point(aes(colour=state)) +
  xlab("Time") +
  ylab(latex2exp::TeX("$\\Y_t")) +
  scale_color_brewer("State", palette = "Dark2")
g

ggsave("../figures/simulated_corrupt.pdf", g, width = 6,
       height = 3)
```

Fit normal HMM with 2 states
```{r}
model <- stan_model("hmm.stan")

data_stan <- list(
  N=nrow(df),
  dist=df$obs,
  K=2,
  cost=10,
  p_anomalous=0.01,
  conc_anomalous=100
)

fit <- optimise_repeat(5, model, data_stan)

# some issue with algo for t=1
df$state_est <- fit$par$state
df$state_est[1] <- 1

g <- df %>%
  mutate(state_est=as.factor(state_est)) %>% 
  ggplot(aes(x=time, y=obs)) +
  geom_line() +
  geom_point(aes(colour=state_est)) +
  xlab("Time") +
  ylab(latex2exp::TeX("$\\Y_t")) +
  scale_color_brewer("State", palette = "Dark2")

g
ggsave("../figures/simulated_corrupt_normal_state.pdf", g, width = 6,
       height = 3)
```

Try normal model with 3 states
```{r}
model <- stan_model("hmm.stan")

data_stan <- list(
  N=nrow(df),
  dist=df$obs,
  K=3,
  cost=10,
  p_anomalous=0.01,
  conc_anomalous=100
)

fit <- optimise_repeat(5, model, data_stan)

df$state_est <- fit$par$state
df$state_est[1] <- 1

g <- df %>%
  mutate(state_est=as.factor(state_est)) %>% 
  ggplot(aes(x=time, y=obs)) +
  geom_line() +
  geom_point(aes(colour=state_est)) +
  xlab("Time") +
  ylab(latex2exp::TeX("$\\Y_t")) +
  scale_color_brewer("State", palette = "Dark2")

g

ggsave("../figures/simulated_corrupt_normal_state_3.pdf", g, width = 6,
       height = 3)
```


Fit a model with rubbish collection state.
```{r}
data_stan <- list(
  N=nrow(df),
  dist=df$obs,
  K=2,
  cost=10,
  p_anomalous=0.01,
  conc_anomalous=100
)
model <- stan_model("hmm_bernoulli.stan")
fit <- optimise_repeat(5, model, data_stan)

df$state_est <- fit$par$state
df$state_est[1] <- 1

g <- df %>%
  mutate(state_est=as.factor(state_est)) %>% 
  ggplot(aes(x=time, y=obs)) +
  geom_line() +
  geom_point(aes(colour=state_est)) +
  xlab("Time") +
  ylab(latex2exp::TeX("$\\Y_t")) +
  scale_color_brewer("State", palette = "Dark2")

g

ggsave("../figures/simulated_corrupt_rubbish.pdf", g, width = 6,
       height = 3)
```

```{r}
fit$par$phi
fit$par$mu
fit$par$sigma
```


# 2D HMM
```{r}
states <- simulate_hmm_states(n_steps, 1, m_transition_probs)
library(mvtnorm)
rmvrnorm2D <- function(n, mux, muy, sigmax, sigmay, rho){
  return(rmvnorm(n, c(mux, muy),
                 matrix(c(sigmax^2, sigmax * sigmay * rho,
                          sigmax * sigmay * rho, sigmay^2),
                        ncol = 2)))
}
rmvt2D <- function(n, mux, muy, sigmax, sigmay, rho, df){
  return(rmvt(n,
              matrix(c(sigmax^2, sigmax * sigmay * rho,
                       sigmax * sigmay * rho, sigmay^2),
                     ncol = 2),
              df,
              c(mux, muy)))
}

x <- matrix(nrow = length(states), ncol=2)
p1_corruption <- 0.05
p2_corruption <- 0.1
for(i in seq_along(states)) {
  u <- runif(1)
  if(states[i] == 1) {
    if(u < p1_corruption) { 
      x[i, ] <- rmvrnorm2D(1, -5, -5, 2, 2, 0)
    } else {
      x[i, ] <- rmvrnorm2D(1, 0, 0, 1, 1, 0)
    }
  } else {
    if(u < p2_corruption) { 
      x[i, ] <- rmvrnorm2D(1, 4, 4, 1, 1, 0.8)
    } else {
      x[i, ] <- rmvrnorm2D(1, 8, 8, 1, 1, 0)
    }
  }
}

x <- x %>% 
  as.data.frame()

g <- x %>% 
  mutate(state=states) %>% 
  ggplot(aes(x=V1, y=V2)) +
  geom_point(aes(colour=as.factor(state))) +
  xlab(latex2exp::TeX("$\\Y_{1t}")) +
  ylab(latex2exp::TeX("$\\Y_{2t}")) +
  scale_color_brewer("State", palette = "Dark2")
g

ggsave("../figures/simulated_2d_corrupt.pdf", g, width = 6,
       height = 3)
```

Fit 2D robust HMM
```{r}
data_stan <- list(
  N=nrow(df),
  dist=x %>% as.matrix(),
  K=2,
  cost=7,
  p_anomalous=0.1,
  conc_anomalous=100
)
model <- stan_model("hmm_bernoulli_2d.stan")
fit <- optimise_repeat(10, model, data_stan)
fit_r <- fit

g <- x %>% 
  mutate(state=fit_r$par$state) %>% 
  ggplot(aes(x=V1, y=V2)) +
  geom_point(aes(colour=as.factor(state))) +
  xlab(latex2exp::TeX("$\\Y_{1t}")) +
  ylab(latex2exp::TeX("$\\Y_{2t}")) +
  scale_color_brewer("State", palette = "Dark2")
g

ggsave("../figures/simulated_2d_corrupt_rubbish.pdf", g, width = 6,
       height = 3)
```

```{r}
fit$par$mu
fit$par$sigma
```

Try standard HMM with 2 states
```{r}
data_stan <- list(
  N=nrow(df),
  dist=x %>% as.matrix(),
  K=2,
  cost=7,
  p_anomalous=0.1,
  conc_anomalous=100
)

model <- stan_model("hmm_2d.stan")
fit <- optimise_repeat(10, model, data_stan)
fit_2 <- fit

# manual state cleaning
state_1 <- fit_2$par$state
state_c <- if_else(state_1==1, 2, 1)

g <- x %>% 
  mutate(state=state_c) %>% 
  ggplot(aes(x=V1, y=V2)) +
  geom_point(aes(colour=as.factor(state_c))) +
  xlab(latex2exp::TeX("$\\Y_{1t}")) +
  ylab(latex2exp::TeX("$\\Y_{2t}")) +
  scale_color_brewer("State", palette = "Dark2")
g

ggsave("../figures/simulated_2d_corrupt_normal_2.pdf", g, width = 6,
       height = 3)
```

```{r}
fit$par$mu
fit$par$sigma
```


Try standard HMM with 3 states
```{r}
data_stan <- list(
  N=nrow(df),
  dist=x %>% as.matrix(),
  K=3,
  cost=7,
  p_anomalous=0.1,
  conc_anomalous=100
)

model <- stan_model("hmm_2d.stan")
fit <- optimise_repeat(10, model, data_stan)
fit_3 <- fit

# manual state cleaning
state_1 <- fit_3$par$state
state_c <- if_else(state_1==1, if_else(state_1), 1)

g <- x %>% 
  mutate(state=state_c) %>% 
  ggplot(aes(x=V1, y=V2)) +
  geom_point(aes(colour=as.factor(state_1))) +
  xlab(latex2exp::TeX("$\\Y_{1t}")) +
  ylab(latex2exp::TeX("$\\Y_{2t}")) +
  scale_color_brewer("State", palette = "Dark2")
g

ggsave("../figures/simulated_2d_corrupt_normal_3.pdf", g, width = 6,
       height = 3)
```

```{r}
fit$par$mu
fit$par$sigma
```

Plot state cleanliness
```{r}
mu_r <- fit_r$par$mu
mu_r <- mu_r[c(2, 1), ]
mu_2 <- fit_2$par$mu
mu_2 <- mu_2[c(2, 1), ]
mu_3 <- fit_3$par$mu[c(1, 3), ]
mu_true <- matrix(c(0, 0, 8, 8), ncol=2, byrow = TRUE)

tmp <- tribble(
  ~y1, ~y2, ~state, ~label,
  mu_true[1, 1], mu_true[1, 2], 1, "truth",
  mu_true[2, 1], mu_true[2, 2], 2, "truth",
  mu_r[1, 1], mu_r[1, 2], 1, "rubbish",
  mu_r[2, 1], mu_r[2, 2], 2, "rubbish",
  mu_2[1, 1], mu_2[1, 2], 1, "2-state normal",
  mu_2[2, 1], mu_2[2, 2], 2, "2-state normal",
  mu_3[1, 1], mu_3[1, 2], 1, "3-state normal",
  mu_3[2, 1], mu_3[2, 2], 2, "3-state normal"
) %>% 
  mutate(state=as.factor(state))

g <- tmp %>% 
  ggplot(aes(x=y1, y=y2)) +
  geom_point(data=tmp %>% filter(label=="truth"), colour="red") +
  geom_point(aes(shape=label), size=2) +
  xlab(latex2exp::TeX("$\\Y_{1t}")) +
  ylab(latex2exp::TeX("$\\Y_{2t}")) +
  scale_color_brewer("State", palette = "Dark2") +
  facet_wrap(~state, scales="free")
g

ggsave("../figures/state_cleanliness_mean.pdf", g)
```
Output covariances for visualisation in Mathematica
```{r}
sigma_r <- fit_r$par$sigma
sigma_2 <- fit_2$par$sigma
sigma_3 <- fit_3$par$sigma
```


# Apply rubbish collection to real data
```{r}
df <- read.csv("../data/ElephantsData_ano.csv",sep=";")
df <- df %>% 
  rename_all(tolower)
```


```{r}
df_1 <- df %>% 
  filter(id == 10) %>% 
  mutate(dist = dist /max(dist)) %>% 
  mutate(time=seq_along(x)) %>% 
  slice(1:4000)

df_1 %>% 
  select(time, dist, angle) %>% 
  pivot_longer(-time) %>% 
  ggplot(aes(x=time, y=value)) +
  geom_line() +
  facet_wrap(~name, scales="free")
```
Fit normal model
```{r}
data_stan <- list(
  N=nrow(df_1),
  dist=df_1$dist,
  angle=df_1$angle,
  K=2,
  cost=10,
  p_anomalous=0.01,
  conc_anomalous=100
)
model <- stan_model("hmm_wrapped_cauchy.stan")
fit <- optimise_repeat(5, model, data_stan)

df_1 %>% 
  select(time, dist, angle) %>% 
  mutate(state=as.factor(fit$par$state)) %>% 
  pivot_longer(-c(time, state)) %>% 
  ggplot(aes(x=time, y=value, colour=state)) +
  geom_line(aes(group=1)) +
  facet_wrap(~name, scales="free")
```

```{r}
df_1 %>% 
  select(time, dist, angle) %>% 
  mutate(state=as.factor(fit$par$state)) %>% 
  pivot_longer(-c(time, state)) %>% 
  ggplot(aes(x=value, fill=state)) +
  stat_density(position="identity", alpha=0.3) +
  facet_wrap(~name, scales="free")
```

Using rubbish collector
```{r}
data_stan <- list(
  N=nrow(df_1),
  dist=df_1$dist,
  angle=df_1$angle,
  K=2,
  cost=0.5,
  p_anomalous=0.1,
  conc_anomalous=10
)
model <- stan_model("hmm_bernoulli_wrapped_cauchy.stan")
fit <- optimise_repeat(2, model, data_stan)

df_1 %>% 
  select(time, dist, angle) %>% 
  mutate(state=as.factor(fit$par$state)) %>% 
  pivot_longer(-c(time, state)) %>% 
  ggplot(aes(x=time, y=value, colour=state)) +
  geom_line(aes(group=1)) +
  scale_color_brewer("State", palette = "Dark2") +
  facet_wrap(~name, scales="free")
```


```{r}
g <- df_1 %>% 
  select(time, dist, angle) %>% 
  mutate(state=as.factor(fit$par$state)) %>% 
  pivot_longer(-c(time, state)) %>% 
  ggplot(aes(x=value, fill=state)) +
  stat_density(position="identity", alpha=0.5) +
  scale_fill_brewer("State", palette = "Dark2") +
  facet_wrap(~name, scales="free")
g
ggsave("../figures/angle_dist_separate_rubbish.pdf", g, width = 10, height = 4)
```


```{r}
g <- df_1 %>% 
  select(time, dist, angle) %>% 
  mutate(state=as.factor(fit$par$state)) %>% 
  ggplot(aes(x=dist, y=angle)) +
  geom_point(aes(colour=state), size=1) +
  scale_colour_brewer("State", palette = "Dark2") +
  xlab("Distance") +
  ylab("Angle") +
  facet_wrap(~state)
g
ggsave("../figures/angle_dist_2d_rubbish.pdf", g, width = 10, height = 4)
```

```{r}
df_1 %>% 
  select(time, dist, angle) %>% 
  mutate(state=as.factor(fit$par$state)) %>% 
  ggplot(aes(x=dist, y=angle)) +
  geom_density_2d(aes(color=state))
```

